import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

#数据加载
data = pd.read_csv('./data/data.csv')

#设置参数，显示所有列
pd.set_option('display.max_columns',None)

#数据探索
# print(data.info())
# print("*"*50)
# print(data.head(5))
# print("*"*50)
# print(data.describe())

#数据清洗
mean_feature = list(data.columns[2:12])
se_featrue = list(data.columns[12:22])
worst_featrue = list(data.columns[22:32])
data.drop(0,inplace=True)
# print(data["diagnosis"])
data['diagnosis'] = data['diagnosis'].map({'M':1,'B':0})

#选择特征
sns.countplot(data['diagnosis'],label='Count')
# 保存为图像文件（例如 PNG 格式）
# print(data["diagnosis"])
plt.savefig("./result/tumor_diagnosis.png")
# 用热力图呈现
# features_mean字段之间的相关性
corr = data[mean_feature].corr()
plt.figure(figsize=(14,14))
# annot=True显示每个方格的数据
sns.heatmap(corr, annot=True)
plt.savefig("./result/tumor_diagnosis2.png")